Using A2C

Using A2C on “CartPole-v0”

import gym

from genrl.agents import A2C
from genrl.trainers import OnPolicyTrainer
from genrl.environments import VectorEnv

env = VectorEnv("CartPole-v0")
agent = A2C('mlp', env, gamma=0.9, lr_policy=0.01, lr_value=0.1, policy_layers=(32,32), value_layers=(32, 32),rollout_size=2048)
trainer = OnPolicyTrainer(agent, env, log_mode=['stdout', 'tensorboard'], log_key="Episode")
trainer.train()

Using A2C on atari env - “Pong-v0”



env = VectorEnv("Pong-v0", env_type = "atari")
agent = A2C('cnn', env, gamma=0.99, lr_policy=0.01, lr_value=0.1,  policy_layers=(32,32), value_layers=(32, 32), rollout_size=2048)
trainer = OnPolicyTrainer(agent, env, log_mode=['stdout', 'tensorboard'], log_key="timestep")
trainer.train()

More details can be found in the docs for A2C and OnPolicyTrainer.